---
title: "External Validity: Framework, Design and Analysis"
subtitle: "Supplementary Materials (Appendix 1) Analyses"
author: "Naoki Egami and Erin Hartman"
date: \today
output:
  html_document:
    df_print: paged
---

```{r setup, include=FALSE}
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(message = FALSE)
```

```{r load_libraries, include = FALSE}
## Load the evalid functions from Github.  At time of acceptance, the repository was at commit
## `f4d81f0e8dfc58f6fca8e9e3ac272bb30470cc6d` which is loaded below.

if(!require(evalid)) {
  library(devtools)
  install_github("naoki-egami/evalid@f4d81f0e8dfc58f6fca8e9e3ac272bb30470cc6d", dependencies = TRUE)
}
library(evalid)

library(grf)
library(foreign)
library(lmtest)
library(sandwich)
library(survey)
library(MASS)
library(parallel)
library(estimatr)
library(bartCause)
library(tidyverse)
library(knitr)
library(kableExtra)
library(estimatr)
library(ri)
```

## System Information
```{r}
print(Sys.time())

start_time <- Sys.time()

print(sessionInfo())
```

```{r}
# Generate folders if they do not exist
if(!file.exists("./generated/appendix_1")) dir.create("./generated/appendix_1")
if(!file.exists("./generated/Figures")) dir.create("./generated/Figures")
if(!file.exists("./generated/Tables")) dir.create("./generated/Tables")
```

```{r}
set.seed(29384)
```


# Supplementary Materials Section C

## Section C1: Field Experiment: Broockman and Kalla (2016)

Load Broockman and Kalla (2016) data.

```{r bk_load_data, include = TRUE}
load("./generated/data/recoded_broockman_kalla_data.RData")

## covariates used for estimation
covariates <- c('vf_female', 'vf_black', 'vf_white', # leaving out 'vf_hispanic', 
                        # 'vf_vg_14', 'vf_vg_12', # 'vf_vg_10', 
                        # 'ideology_t0', 'religious_t0', 'pid_t0')
                        names(sample)[str_detect(names(sample), "ideology_t0_factor|religious_t0_factor|pid_t0_factor|vf_age_bucket")][-c(1:4)])

# regression controls used in original analysis  
control_vars <- c('miami_trans_law_t0', 'miami_trans_law2_t0', 'therm_trans_t0', 
  'gender_norms_sexchange_t0', 'gender_norms_moral_t0', 'gender_norms_abnormal_t0',
  'ssm_t0', 'therm_obama_t0', 'therm_gay_t0','vf_democrat', 'ideology_t0', 
  'religious_t0', 'exposure_gay_t0', 'exposure_trans_t0', 'pid_t0', 'sdo_scale',
  'gender_norm_daugher_t0', 'gender_norm_looks_t0', 
  'gender_norm_rights_t0', 'therm_afams_t0', 'vf_female', 'vf_hispanic',
  'vf_black', 'vf_age', 'survey_language_es', 'cluster_level_t0_scale_mean')

nboot = 1000
```

### T-validity

We estimate the T-PATE by canvasser identity.

```{r canvasser_id_analysis, fig.width=10, fig.height = 7, results = 'hide', message = FALSE}

plot_data_comp <- NULL

for(i in 1:4) {
  # run for canvasser identity and outcome period
  for(trans_canvasser in c(-1, 0, 1)) {
    cat("\ni: ", i, "\n\ntranscanvasser: ", trans_canvasser, "\n")
    outcome <- paste0("trans.tolerance.dv.t", i)
    
    ## subset to canvasser identity
    if(trans_canvasser >= 0) {
      exp_data <- sample %>% filter(!is.na(sample[, outcome])) %>%
      filter(canvasser_trans == trans_canvasser)  
    } else {
      ## pooled across canvasser identity
      exp_data <- sample %>% filter(!is.na(sample[, outcome]))
    }
    
    # generate formulas for analysis
    formula_outcome <- as.formula(paste0(outcome, " ~ ", paste0(covariates, collapse = " + ")))
    formula_outcome_wls <- as.formula(paste0(outcome, " ~ ", paste0(control_vars, collapse = " + ")))
    formula_weights <- as.formula(paste0("S ~ ", paste0(covariates, collapse = " + ")))
    
    ## Sate estimate
    sate_est <- data.frame(estimator = "SATE-controls",  type = "SATE",
                           subgroup = as.character(trans_canvasser), time = i,
                       tpate(formula_outcome = formula_outcome_wls,
                               formula_weights = formula_weights,
                               exp_data = exp_data, pop_data = exp_data,
                               treatment = "treat_ind", 
                               id_cluster = exp_data$block_ind,
                               est_type = "wls", weights_type = "calibration",
                               sims = nboot,
                               compute_sate = FALSE)$tpate[c("est", "se", "ci_lower", "ci_upper")] %>% data.frame())
    
    if(i < 3 & trans_canvasser == 1) {
      ## Calibration fails among transgender canvassers
      ## So not calibrating on conservative
      formula_weights <- as.formula(paste0("S ~ ", paste0(covariates[-8], collapse = " + ")))  
    }
    if(i >= 3 & trans_canvasser == 1) {
      ## Calibration fails among transgender canvassers
      ## So not calibrating on conservative
      formula_weights <- as.formula(paste0("S ~ ", paste0(covariates[-3], collapse = " + ")))  
    }
    if(i == 4 & trans_canvasser == 1) {
      ## Calibration fails in wave 4 among transgender canvassers
      ## So only calibrating on white/non-white
      formula_weights <- as.formula(paste0("S ~ ", paste0(covariates[-3], collapse = " + ")))  
    }
    
    # WLS estimate
    wls_cal <- data.frame(estimator = "wls-cal",  type = "weighting",
                          subgroup = as.character(trans_canvasser), time = i,
                       tpate(formula_outcome = formula_outcome_wls,
                             formula_weights = formula_weights,
                             exp_data = exp_data, pop_data = pop,
                             treatment = "treat_ind", 
                             id_cluster = exp_data$block_ind,
                             est_type = "wls", weights_type = "calibration",
                             sims = nboot,
                             weights_max = 10,
                             compute_sate = FALSE)$tpate[c("est", "se", "ci_lower", "ci_upper")]
                       %>% data.frame())
    
    plot_data_comp <- bind_rows(plot_data_comp,
                                sate_est,
                                wls_cal)
    
  }
}
  

plot_data_comp$subgroup_recode = factor(plot_data_comp$subgroup, levels = c(-1, 0, 1), labels = c("All", "Non-Transgender Canvasser", "Transgender Canvasser"))

plot_data_comp$time_name = factor(plot_data_comp$time, labels = c("+3 Days", "+3 Weeks", "+6 Weeks", "+3 Months"))

plot_data_comp$estimator_type = factor(case_when(str_detect(plot_data_comp$type, "SATE") ~ "SATE",
                                         str_detect(plot_data_comp$type, "outcome") ~ "T-PATE:\nOutcome-based Estimator",
                                         str_detect(plot_data_comp$type, "doubly robust") ~ "T-PATE:\nDoubly Robust Estimator",
                                         TRUE ~ "T-PATE:\nWeighting-based Estimator"),
                                        levels = c("SATE", "T-PATE:\nWeighting-based Estimator",
                                                   "T-PATE:\nOutcome-based Estimator",
                                                   "T-PATE:\nDoubly Robust Estimator"))
  
plot_data_comp$estimator_name = factor(case_when(plot_data_comp$estimator == "SATE-controls" ~ "SATE:\nOLS",
                                                 plot_data_comp$estimator == "ipw-cal" ~ "T-PATE:\nwIPW",
                                                 plot_data_comp$estimator == "wls-cal" ~ "T-PATE:\nwLS",
                                                 plot_data_comp$estimator == "OLS-proj" ~ "T-PATE:\nOLS\nProjection",
                                                 plot_data_comp$estimator == "DR-OLS-cal" ~ "T-PATE\nAIPW\nOLS"), levels = c("SATE:\nOLS", "T-PATE:\nwLS", "T-PATE:\nwIPW", "T-PATE:\nOLS\nProjection", "T-PATE\nAIPW\nOLS"))

save(plot_data_comp, file = paste0("./generated/appendix_1/results_bk_canvasser_id.RData"))
```

Generate Figure A1.
```{r plot_canvasser_id_results, echo = FALSE, fig.width = 8, fig.height = 6, fig.align='center'}
plot_data_comp %>%
  ggplot(aes(x = time_name, y = est, color = estimator_name)) +
  geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), position = position_dodge(width = 0.4)) +
  geom_point(aes(y = est), position = position_dodge(width = 0.4)) +
  theme_bw() +
  xlab("Days After Canvassing") +
  ylab("Estimated Causal Effects on Transgender Tolerance Scale") +
  ggtitle("T-PATE Estimates by Canvasser Identity") +
  theme(legend.position="bottom") +
  geom_hline(yintercept = 0) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_grid(~ subgroup_recode) +
  labs(color = "Estimator")

ggsave("./generated/Figures/Figure_A1_Broockman_Kalla_Canvass_Id.pdf", width = 8, height = 4.5)
```

```{r numerics_figure_A1, include = FALSE, eval = TRUE}
## Outputs numeric code to "Appendix_A1_numerics.tex"
row.names(plot_data_comp) <- NULL
plot_data_comp$estimator_type <- as.character(plot_data_comp$estimator_type)
plot_data_comp$estimator_type <- gsub("\n", " ", plot_data_comp$estimator_type)

cat(
  plot_data_comp %>%
      arrange(subgroup_recode, time) %>%
  select(time_name, estimator_type, est, se, ci_lower, ci_upper) %>%
    mutate(ci = paste0("[", round(ci_lower, 3), ", ", round(ci_upper, 3), "]")) %>%
    select(-ci_lower, -ci_upper) %>%
  kable(caption = "Numeric Values for T-PATE Estimates for Broockman and Kalla (2016) by Canvasser Identity in Figure A1.",
        booktabs = T,
        col.names = c("Time Period", "Estimator", "Estimate", "SE", "95% CI"),
        digits = 3,
        row.names = NA,
        format = "latex",
        align = c("l", "l", "r", "r", "c")) %>%
  kable_styling(latex_options = c("hold_position")) %>%
  pack_rows("All", 1, 8) %>%
  pack_rows("Non-Transgender Canvasser", 9, 16, latex_gap_space = "1em") %>%
  pack_rows("Transgender Canvasser", 17, 24, latex_gap_space = "1em"),
  file = "./generated/Tables/Appendix_A1_numerics.tex")
```

Sign-generalization of T-validity.
```{r bk_t_validity, include = TRUE}
field_t_validity <- plot_data_comp %>% filter(subgroup != "all") %>%
  group_by(estimator) %>%
  do(pct(1 - pnorm(.$est / .$se))) %>%
  summarize(thresh = max(threshold[p_value < 0.05]),
            thresh = ifelse(is.finite(thresh), thresh, 0),
            p_value = p_value[threshold == thresh]) %>%
  arrange(estimator)
```

### C-validity
```{r bk_c_validity, include = TRUE, eval = FALSE}

covariates_expt_attrition <- c("vf_female", "vf_black", "vf_hispanic", "vf_vg_14", "vf_vg_12", "vf_vg_10",
                               "vf_democrat", "vf_republican",
                               names(full_data)[str_detect(names(full_data), "vf_age_bucket")])

# miami_trans_law_withdef_t3
# miami_trans_law2_withdef_t3
# miami_trans_law_withdef_t4
# miami_trans_law2_withdef_t4

## Using the averages across the two laws used in the original analysis
do_tpate_est <- function(outcome, estimator, controls = NULL, sate_controls = FALSE) {
    
  ## removing rows with missing outcome
  exp_data <- sample[!is.na(sample[, outcome]), ]
  
  ## weighting formula
  formula_weights <- as.formula(paste0("S ~ ", paste0(covariates_expt_attrition, collapse = " + ")))
  
  ## if doing wls with controls, replace outcome controls
  if(!is.null(controls)) covariates_expt_attrition <- controls
  
  ## outcome formula
  formula_outcome <- as.formula(paste0(outcome, " ~ ",
                                       paste0(covariates_expt_attrition, collapse = " + ")))
  
  ## if doing sate with controls, replace full_data with exp_data
  if(sate_controls) full_data <- exp_data
  
  res <- data.frame(estimator = estimator, outcome = outcome, 
                        tpate(formula_outcome = formula_outcome,
                              formula_weights = formula_weights,
                              exp_data = exp_data, pop_data = full_data,
                              treatment = "treat_ind",
                              id_cluster = exp_data$block_ind,
                              weights_type = "calibration",
                              est_type = estimator,
                              sims = nboot,
                              weights_max = 10,
                              compute_sate = FALSE)$tpate[c("est", "se", "ci_lower", "ci_upper")] %>% data.frame())
  return(res)
}

## Sign-generalization for PATE
law_support_wls <- bind_rows(
  do_tpate_est("miami_trans_law_t3_avg", "wls", controls = control_vars),
  do_tpate_est("trans_law_post_ad_t3", "wls", controls = control_vars),
  do_tpate_est("miami_trans_law_t4_avg", "wls", controls = control_vars))

pct(1 - pnorm(law_support_wls$est/law_support_wls$se))

## Sign-generalization for SATE
law_support_unwtd <- bind_rows(
  do_tpate_est("miami_trans_law_t3_avg", "wls", controls = control_vars, sate_controls = TRUE),
  do_tpate_est("trans_law_post_ad_t3", "wls", controls = control_vars, sate_controls = TRUE),
  do_tpate_est("miami_trans_law_t4_avg", "wls", controls = control_vars, sate_controls = TRUE))

pct(1 - pnorm(law_support_unwtd$est/law_support_unwtd$se))

save(law_support_wls, law_support_unwtd, file = paste0("./generated/appendix_1/results_bk_c_validity.RData"))
```

## Section C2: Survey Experiment: Partisan-Motivated Reasoning

### C-validity

```{r Bisgaard_load_data, include = TRUE}
load("./generated/data/study1.RData")
load("./generated/data/study2.RData")
load("./generated/data/study3.RData")
load("./generated/data/study4.RData")
```

Recode the data, same as in main manuscript.
```{r Bisgaard_functions, include = TRUE}
lm_analysis <- function(study, outcome_var, gov = TRUE) {
  study <- recode_study(study, outcome_var, gov)
  mod_fit <- lm_robust(outcome ~ treatment, data = study)
  return(mod_fit)
}

recode_study <- function(study, outcome_var, is_gov_supporter) {
  if(!("resp1" %in% names(study))) {
    study$resp1 <- rep(NA, nrow(study))
  }
  if(!("c1raw" %in% names(study))) {
    study$c1raw <- rep(NA, nrow(study))
  }
  if(!("obamaspend" %in% names(study))) {
    study$obamaspend <- rep(NA, nrow(study))
    study$obamaleader <- rep(NA, nrow(study))
    study$congress <- rep(NA, nrow(study))
    study$fed <- rep(NA, nrow(study))
    study$wallst <- rep(NA, nrow(study))
    study$export <- rep(NA, nrow(study))
  }
  
  study <- study %>%
    filter(!is.na(gov)) %>%
    filter(gov == as.numeric(is_gov_supporter)) %>%
    filter(treatment %in% c("pos", "neg")) %>%
    mutate(resp1 = case_when(is.na(resp1) ~ 0.5,
                             TRUE ~ as.numeric(resp1)),
           c1 = case_when(c1raw == 0 ~ 1,
                          c1raw == 3 ~ 2,
                          c1raw == 15 ~ 2,
                          c1raw == -1 ~ 1,
                          TRUE ~ as.numeric(c1raw)) - 1,
           obamaspend = obamaspend - 4,
           obamaleader = obamaleader - 4,
           congress = -1 * (congress - 4),
           fed = -1 * (fed - 4),
           wallst = -1 * (wallst - 4),
           export = -1 * (export - 4)) %>%
    mutate(treatment = as.numeric(treatment == "neg"),
           outcome = get(outcome_var))
  return(study)
}
```

Analysis among opposition supporters.
```{r Bisgaard_opposition_analysis_opp, include = TRUE}
## For Opposition Supporters
opp_results <- list(
    # (1) Close, Study 1 in the US 
    lm_analysis(study1, "resp1", 0),
    # (2) Close, Study 2 in Denmark 
    lm_analysis(study2, "resp1", 0),
    # (3) Close, Study 3 in Denmark 
    lm_analysis(study3, "resp1", 0),
    # (4) Open, Study 1 in the US 
    lm_analysis(study1, "c1", 0),
    # (5) Open, Study 3 in Denmark 
    lm_analysis(study3, "c1", 0),
    # (6) Open, Study 4 in Denmark
    lm_analysis(study4, "c1", 0),
    # (7) Argument/Pro -- obamaspend, Study 1 in the US
    lm_analysis(study1, "obamaspend", 0),
    # (8) Argument/Pro -- obamaleader, Study 1 in the US
    lm_analysis(study1, "obamaleader", 0),
    # (9) Argument/Con -- congress, Study 1 in the US
    lm_analysis(study1, "congress", 0),
    # (10) Argument/Con -- fed, Study 1 in the US
    lm_analysis(study1, "fed", 0),
    # (11) Argument/Con -- wallst, Study 1 in the US
    lm_analysis(study1, "wallst", 0),
    # (12) Argument/Con -- export, Study 1 in the US
    lm_analysis(study1, "export", 0)
)

opp_results <- lapply(opp_results, function(x) (tidy(x) %>% filter(term == "treatment"))[, c("estimate", "std.error")]) %>% bind_rows()

opp_pct <- data.frame(type = "(H2) Opposition Supporters", pct(1 - pnorm(opp_results$estimate/opp_results$std.error)))
```

Analysis among government supporters.
```{r Bisgaard_opposition_analysis_gov, include = TRUE}
## For Government Supporters
gov_results <- list(
    # (1) Close, Study 1 in the US 
    lm_analysis(study1, "resp1", 1),
    # (2) Close, Study 2 in Denmark 
    lm_analysis(study2, "resp1", 1),
    # (3) Close, Study 3 in Denmark 
    lm_analysis(study3, "resp1", 1),
    # (4) Open, Study 1 in the US 
    lm_analysis(study1, "c1", 1),
    # (5) Open, Study 3 in Denmark 
    lm_analysis(study3, "c1", 1),
    # (6) Open, Study 4 in Denmark
    lm_analysis(study4, "c1", 1),
    # (7) Argument/Pro -- obamaspend, Study 1 in the US
    lm_analysis(study1, "obamaspend", 1),
    # (8) Argument/Pro -- obamaleader, Study 1 in the US
    lm_analysis(study1, "obamaleader", 1),
    # (9) Argument/Con -- congress, Study 1 in the US
    lm_analysis(study1, "congress", 1),
    # (10) Argument/Con -- fed, Study 1 in the US
    lm_analysis(study1, "fed", 1),
    # (11) Argument/Con -- wallst, Study 1 in the US
    lm_analysis(study1, "wallst", 1),
    # (12) Argument/Con -- export, Study 1 in the US
    lm_analysis(study1, "export", 1)
)

gov_results <- lapply(gov_results, function(x) (tidy(x) %>% filter(term == "treatment"))[, c("estimate", "std.error")]) %>% bind_rows()

gov_pct <- data.frame(type = "(H1) Government Supporters", pct(pnorm(gov_results$estimate/gov_results$std.error)))
```

```{r bisgaard_color_results, include = TRUE}
res_bisgaard <- bind_rows(opp_pct, gov_pct) %>%
  mutate(Yval = case_when(
    h_num %in% c(1, 2, 3) ~ "Open-Ended",
    h_num %in% c(4, 5, 6) ~ "Close-Ended",
    TRUE ~ "Argument Rating"),
    Cval = case_when(
      h_num %in% c(1, 4, 7, 8, 9, 10, 11, 12) ~ "United States",
      h_num %in% c(2) ~ "Denmark (Center-left)",
      TRUE ~ "Denmark (Center-right)"
    ))

# Rename
res_bisgaard$type <- as.character(res_bisgaard$type)
res_bisgaard$type[res_bisgaard$type == "(H1) Government Supporters"] <- "(H1) Incumbent Supporters"
res_bisgaard$type <- as.factor(res_bisgaard$type)
res_bisgaard <- res_bisgaard %>%
  arrange(type, threshold)

save(res_bisgaard, file = paste0("./generated/appendix_1/bisgaard_results.RData"))
```

Generate Figure A2.
```{r}
res_plot <- ggplot(res_bisgaard, aes(x = threshold, y = p_value, color = Cval, shape = Yval)) + geom_point() + 
  ylim(c(0, 0.3)) + geom_hline(yintercept = 0.05, linetype = 2) + 
  theme_bw() +
  xlab("Threshold") + ylab("Partial Conjunction p-value") +
  theme(axis.title.y = element_text(size = 10, margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(size = 10, margin = margin(t = 10, r = 0, b = 0, l = 0)),
        legend.title=element_text(size=9)) +
  facet_wrap(~type) +
  # geom_text(x=2, y=0.28, label="8/12") + 
  scale_color_manual(guide = guide_legend(title = "Context"), values=c("orange", "red", "blue")) +
  scale_shape(guide = guide_legend(title = "Outcome")) +
  scale_x_continuous(breaks = c(2, 4, 6, 8, 10, 12)) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank())

res_plot

ggsave("./generated/Figures/Figure_A2_Bisgaard.pdf", width = 6, height = 2.5)
```

```{r numerics_figure_A2, include = FALSE, eval = TRUE}
## Outputs numeric code to "Appendix_A2_numerics.tex"
cat( "\n\n\n", 
  res_bisgaard %>%
       select(threshold, Yval, Cval, p_value) %>%
     kable(caption = "Numeric Values for Sign-Generalization Test for Bisgaard (2019) in Figure A2.",
        booktabs = T,
        col.names = c("Threshold", "Outcome Variation", "Context Variation", "P-value"),
        digits = 3,
        format = "latex") %>%
  pack_rows("(H1) Incumbent Supporters", 1, 12, latex_gap_space = "1em") %>%
    pack_rows("(H2) Opposition Supporters", 13, 24, latex_gap_space = "1em"),
     file = "./generated/Tables/Appendix_A2_numerics.tex")
```

## Section C3: Lab Experiment: Young (2019)

Create functions for analysis and load data.
```{r young_functions, include = TRUE}
# wLS function for analysis
wLS <- function(outcome_var, data, type = "TP", pop_weights = NULL){

  formula <- formula(paste0(outcome_var, " ~ treat_assign"))

  if(is.null(pop_weights)) {
    pop_weights <- rep(1, nrow(data))
  }

  # Setup
  ## Using probabilities from original author
  dat_in <- data %>% mutate(p_weights = pop_weights) %>%
    subset(treat_assign %in% c("C", type)) %>%
    mutate(treat_assign = as.numeric(treat_assign == type),
           outcome = get(outcome_var)) %>% subset(!is.na(outcome)) %>%
    mutate(inv_prob = 1/genprobexact(treat_all, blockvar = block) * p_weights) # generating probabilities of treatment by block

  mod_fit <- lm_robust(formula,
                       data = dat_in,
                       weights = dat_in$inv_prob)
  return(mod_fit)
}
```


```{r young_load_data, include = TRUE}
# load the data
load("./generated/data/young_2019.RData")

## create subset of actual wristband days for behavioral measure
datw <- subset(dat, dat$wristband_real==1)
```

Do analysis.
```{r young_analysis, include = TRUE}

## Hypothesis 1: People in a state of fear will express less dissent.

## Testing across: 12 indexes, behavioral (wristband) outcome, and electoral contexts

do_tests_1 <- c("prob_shirt_elec", "prob_joke_elec", "prob_meeting_elec", "prob_vh_elec", "prob_pungwe_elec", "prob_testify_elec",
              "prob_shirt_now", "prob_joke_now", "prob_meeting_now", "prob_vh_now", "prob_pungwe_now", "prob_testify_now")

hypoth1outcomes <- c(lapply(do_tests_1, function(x) wLS(x, data = dat, type = "TG")),
                  lapply(do_tests_1, function(x) wLS(x, data = dat, type = "TP")),
                  lapply("wristband", function(x) wLS(x, data = datw, type = "TG")),
                  lapply("wristband", function(x) wLS(x, data = datw, type = "TP")))

## Hypothesis 2: People in a state of fear will be more pessimistic about the risk of repression.

do_tests_2 <- c('prob_threat_now', 'prob_threat_elec', 'prob_beat_now', 'prob_beat_elec',
              'prob_abduct_now', 'prob_abduct_elec', 'prob_mdp_now', 'prob_mdp_elec',
              'prob_sexual_now', 'prob_sexual_elec', 'prob_kill_now', 'prob_kill_elec')

hypoth2outcomes <- c(lapply(do_tests_2, function(x) wLS(x, data = dat, type = "TG")),
                  lapply(do_tests_2, function(x) wLS(x, data = dat, type = "TP")))


## Hypothesis 3: People in a state of fear will be more pessimistic in their expectations of whether other opposition supporters will dissent.

do_tests_3 <- c("prob_shirt_oth_now", "prob_shirt_oth_elec", "prob_joke_oth_now", "prob_joke_oth_elec",
              "prob_meeting_oth_now", "prob_meeting_oth_elec", "prob_vh_oth_now", "prob_vh_oth_elec",
              "prob_pungwe_oth_now", 'prob_pungwe_oth_elec', 'prob_testify_oth_now', 'prob_testify_oth_elec')

hypoth3outcomes <- c(lapply(do_tests_3, function(x) wLS(x, data = dat, type = "TG")),
                  lapply(do_tests_3, function(x) wLS(x, data = dat, type = "TP")))

## Test across all time contexts, all treatments, all outcomes (index components and behavioral)
hypoth1_pct <- data.frame(hypothesis = "(H1) Indiv. Express\nLess Dissent", pct(unlist(lapply(hypoth1outcomes, function(x) pnorm(x$coefficients["treat_assign"]/x$std.error["treat_assign"]))))) %>%
  mutate(Tval = case_when(
    h_num %in% c(1:12, 25) ~ "General Fear",
    TRUE ~ "Political Fear"),
    Yval = case_when(h_num %in% c(1:6, 13:18) ~ "Survey/Hypothetical (Current)",
    h_num %in% c(25:26) ~ "Behavioral",
    TRUE ~ "Survey/Hypothetical (Future)"))
hypoth1_pct  %>%
  summarize(thresh = max(threshold[p_value < 0.05]),
            thresh = ifelse(is.finite(thresh), thresh, 0),
            n_tests = max(threshold))

hypoth2_pct <- data.frame(hypothesis = "(H2) Indiv. More Pessimistic\nAbout Risk of Repression", pct(unlist(lapply(hypoth2outcomes, function(x) pnorm(x$coefficients["treat_assign"]/x$std.error["treat_assign"], lower.tail = FALSE))))) %>%
  mutate(Tval = case_when(
    h_num %in% c(1:12) ~ "General Fear",
    TRUE ~ "Political Fear"),
    Yval = case_when(h_num %in% c(1:6, 13:18) ~ "Survey/Hypothetical (Current)",
    TRUE ~ "Survey/Hypothetical (Future)"))
hypoth2_pct  %>%
  summarize(thresh = max(threshold[p_value < 0.05]),
            thresh = ifelse(is.finite(thresh), thresh, 0),
            n_tests = max(threshold))

hypoth3_pct <- data.frame(hypothesis = "(H3) Indiv. More Pessimistic\nThat Others will Dissent", pct(unlist(lapply(hypoth3outcomes, function(x) pnorm(x$coefficients["treat_assign"]/x$std.error["treat_assign"]))))) %>%
  mutate(Tval = case_when(
    h_num %in% c(1:12) ~ "General Fear",
    TRUE ~ "Political Fear"),
    Yval = case_when(h_num %in% c(1:6, 13:18) ~ "Survey/Hypothetical (Current)",
    TRUE ~ "Survey/Hypothetical (Future)"))
hypoth3_pct %>%
  summarize(thresh = max(threshold[p_value < 0.05]),
            thresh = ifelse(is.finite(thresh), thresh, 0),
            n_tests = max(threshold))

res_young <- bind_rows(hypoth1_pct, hypoth2_pct, hypoth3_pct)

res_young$hypothesis <- as.character(res_young$hypothesis)

res_young$Yval[res_young$Yval == "Survey/Hypothetical (Current)"]  <- "Survey (Current)"
res_young$Yval[res_young$Yval == "Survey/Hypothetical (Future)"]  <- "Survey (Future)"
res_young$hypothesis[res_young$hypothesis == "(H1) Indiv. Express\nLess Dissent"]  <- "(H1) Express Less Dissent"
res_young$hypothesis[res_young$hypothesis == "(H2) Indiv. More Pessimistic\nAbout Risk of Repression"] <- "(H2) More Pessimistic \nAbout Risk of Repression"
res_young$hypothesis[res_young$hypothesis == "(H3) Indiv. More Pessimistic\nThat Others will Dissent"] <- "(H3) More Pessimistic\nThat Others will Dissent"

save(res_young, file = paste0("./generated/appendix_1/young_results.RData"))
```

Generate Figure A3.
```{r}

res_plot <-  ggplot(res_young, aes(x = threshold, y = p_value, color = Tval, shape = Yval)) + geom_point() +
  ylim(c(0, 0.4)) + geom_hline(yintercept = 0.05, linetype = 2) +
  theme_bw() +
  xlab("Threshold") + ylab("Partial Conjunction p-value") +
  theme(axis.title.y = element_text(size = 12, margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(size = 12, margin = margin(t = 10, r = 0, b = 0, l = 0)),
        legend.title=element_text(size=10)) +
  facet_wrap(~hypothesis) +
  scale_color_manual(guide = guide_legend(title = "Treatment:", keywidth=0.15,
                 keyheight=0.1,
                 default.unit="inch"), values=c("#00cc6c", "#8237FF")) +
  scale_shape(guide = guide_legend(title = "Outcome:", keywidth=0.15,
                 keyheight=0.1,
                 default.unit="inch")) +
  scale_x_continuous(breaks = c(4, 8, 12, 16, 20, 24)) +
  theme(legend.position = "bottom",
        legend.direction = "horizontal",
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        strip.text.x = element_text(size = 10))

res_plot

ggsave("./generated/Figures/Figure_A3_Young.pdf", width = 8, height = 3.5)
```

```{r numerics_figure_A3, include = FALSE, eval = TRUE}
## Outputs numeric code to "Appendix_A3_numerics.tex"
row.names(res_young) <- NULL
res_young$threshold <- as.numeric(res_young$threshold)
cat( "\n\n\n", 
  res_young %>%
    mutate(hypothesis = as.character(hypothesis),
           hypothesis = str_replace(hypothesis, "\n", " "),
           hypothesis_number = substr(hypothesis, 1, 4)) %>%
    arrange(hypothesis, threshold) %>%
       dplyr::select(hypothesis_number, threshold, Tval, Yval, p_value) %>%
     kable(caption = "Numeric Values for Sign-Generalization Test for Young (2019) in Figure A3.",
        booktabs = T,
        col.names = c("Hypothesis", "Threshold", "Treatment Variation", "Outcome Variation", "P-value"),
        digits = 3,
        format = "latex",
        longtable = TRUE, 
        linesep = "") %>%
    kable_styling(latex_options = c("striped", "repeat_header", "scale_down"), stripe_index = c(1:26, 51:74)),
     file = "./generated/Tables/Appendix_A3_numerics.tex")
```


# Section D: Meta Keta

This analysis re-analyzes the Dunning et al. (2019) "Voter information campaigns and political accountability: Cumulative findings from a preregistered meta-analysis of coordinated trials" paper for sign generalization.  Footnote 9 in the Supplementary Materials describes how the original data was downloaded.

```{r metaketa_load_data}
data <- read_csv("./uploaded_data/MetaKeta 1 results - results.csv")
data <- data %>% select("Outcome", "Treatment", "Country", "Estimate", "SE")

data <- data %>% filter(!str_detect(Treatment, "Public") & !str_detect(Treatment, "Private")) %>%
  filter(Outcome %in% c("Vote Choice", "Turnout")) %>%
  mutate(Outcome = ifelse(Outcome == "Vote Choice", paste0("(H1) ", Outcome), paste0("(H2) ", Outcome)))

data$Outcome = factor(data$Outcome, levels = c("(H1) Vote Choice", "(H2) Turnout"))

## One sided p-vals for normatively positive direction
data <- mutate(data, t = ifelse(str_detect(Treatment, "Good") | str_detect(Outcome, "Backlash"), Estimate/SE, -Estimate/SE),
              p = 1 - pnorm(t))
```

Run PCT by outcome.
```{r meta_keta_do_pct, include = TRUE}
res_meta <- data %>% group_by(Outcome) %>%
  do( res = bind_cols(pct(.$p), .[pct(.$p)$h_num, ]))

res_meta <- res_meta %>% 
  select(-Outcome) %>% 
  unnest(c(res))

save(res_meta, file = paste0("./generated/appendix_1/meta_keta_results.RData"))
```

Generate Figure A4.
```{r include = TRUE, fig.width = 7, fig.height = 2.25}
res_plot <- ggplot(res_meta, aes(x = threshold, y = p_value, color = Country, shape = Treatment)) + geom_point() + 
  ylim(c(0, 1)) + geom_hline(yintercept = 0.05, linetype = 2) + 
  theme_bw() + facet_grid(~Outcome, scales = "free_x") +
  xlab("Threshold") + ylab("Partial Conjunction p-value") +
  theme(axis.title.y = element_text(size = 10, margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(size = 10, margin = margin(t = 10, r = 0, b = 0, l = 0)),
        legend.title=element_text(size=9)) +
  scale_x_continuous(breaks = c(2, 4, 6, 8, 10, 12)) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        legend.position = "right",
        legend.box = "horizontal") +
  guides(shape = guide_legend(title = "Subgroup"))

res_plot
ggsave("./generated/Figures/Figure_A4_DunningEtAl.pdf", width = 6, height = 2.25)
```

```{r numerics_figure_A4, include = FALSE, eval = TRUE}
# Generate table for online appendix.
row.names(res_meta) <- NULL
cat( "\n\n\n", 
res_meta %>%
       select(threshold, Treatment, Country, p_value) %>%
     kable(caption = "Numeric Values for Sign-generalization test for Dunning et al. (2019) in Figure A4.",
        booktabs = T,
        col.names = c("Threshold", "Subgroup Variation", "Context Variation", "P-value"),
        digits = 3,
        format = "latex") %>%
  #kable_styling(latex_options = c("striped")) %>%
  pack_rows("(H1) Vote Choice", 1, 12) %>%
  pack_rows("(H2) Turnout", 13, 24),
     file = "./generated/Tables/Appendix_A4_numerics.tex")
```

Summarize results.
```{r}
res_meta %>% group_by(Outcome) %>%
summarize(mean_pct_p = mean(p_value < 0.05),
          sum_pct_p = sum(p_value < 0.05),
          mean_orig_p = mean(p < 0.05),
          sum_orig_p = sum(p < 0.05),
          n = n())
```

## Section E: External Validity of Observational Studies

This analysis re-analyzes the Dehejia et al. (2019) and Bisbee et al. (2017) paper for sign generalization.

```{r obs_study_load_data}
data <- read_csv("./uploaded_data/Dehejia_etal_TableA1.csv")
data_wb <- read_csv("./uploaded_data/Dehejia_etal_TableA1_country_codings.csv")
names(data_wb)[1] <- "Country"
data_iv <- read_csv("./uploaded_data/Bisbee_etal_TableA1 - IV Paper (Published).csv")

data <- bind_rows(data %>% select(Country, `Year of Census`, `Estimate for More Kids`, `SE for More Kids`) %>%
                    rename(Estimate = `Estimate for More Kids`, SE = `SE for More Kids`) %>%
                    mutate(outcome = "Have More Kids",
                           Study = "Natural Experiment"),
                  data %>% select(Country, `Year of Census`, `Est Economically Active`, `SE Economically Active`) %>%
                    rename(Estimate = `Est Economically Active`, SE = `SE Economically Active`) %>%
                    mutate(outcome = "Economically Active",
                           Study = "Natural Experiment"),
                  data_iv %>% select(Country, Year, IV, IV_SE) %>%
                    rename(Estimate = IV, SE = IV_SE, `Year of Census` = Year) %>%
                    mutate(outcome = "Economically Active",
                           Study = "IV"))

data <- filter(data, !is.na(Estimate) & SE != 0)
data <- mutate(data, t = ifelse(outcome == "Have More Kids", Estimate/SE, -Estimate/SE),
               p = 1 - pnorm(t),
               ci_lower = Estimate - 2*SE,
               ci_upper = Estimate + 2*SE)

data <- merge(data, data_wb %>% select(Country, `Income group`))
data$`Income group` = factor(data$`Income group`, levels = c("High income", "Upper middle income",
                                                                "Lower middle income", "Low income"))
data$outcome = factor(paste0("Outcome: ", data$outcome), levels = c("Outcome: Have More Kids", "Outcome: Economically Active"))
data$Study = factor(paste0("Study Type: ", data$Study), levels = c("Study Type: Natural Experiment", "Study Type: IV"))
```

Run partial conjunciton test analysis by study and outcome.
```{r obs_study_do_pct, include = TRUE}
res_obs <- data %>% group_by(outcome, Study) %>%
  do( res_obs = bind_cols(pct(.$p), .[pct(.$p)$h_num, ])) 

res_obs <- res_obs %>% 
  select(-outcome,-Study) %>% 
  unnest(c(res_obs))

res_obs$Decade = as.factor(floor(res_obs$`Year of Census`/10) * 10)

save(res_obs, file = paste0("./generated/appendix_1/obs_study_results.RData"))
```

Generate Figure A5.
```{r include = TRUE, fig.width = 7, fig.height = 2.5}
res_plot <- ggplot(res_obs %>% filter(Study == "Study Type: Natural Experiment"), aes(x = threshold, y = p_value, color = Decade, shape = `Income group`)) + geom_point() + 
  ylim(c(0, 1)) + geom_hline(yintercept = 0.05, linetype = 2) + 
  theme_bw() + facet_grid(.~outcome, scales = "free_x") +
  xlab("Threshold") + ylab("Partial Conjunction p-value") +
  theme(axis.title.y = element_text(size = 10, margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(size = 10, margin = margin(t = 10, r = 0, b = 0, l = 0)),
        legend.title=element_text(size=9)) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        legend.position = "right",
        legend.box = "horizontal")

res_plot
ggsave("./generated/Figures/Figure_A5_DehejiaEtAl_pct.pdf", width = 7, height = 2.5)
```

```{r numerics_figure_A5, include = FALSE, eval = TRUE}
row.names(res_obs) <- NULL
cat( "\n\n\n", 
res_obs %>% filter(Study == "Study Type: Natural Experiment") %>%
       select(threshold, Decade, `Year of Census`, `Income group`, Country, p_value) %>%
     kable(caption = "Numeric Values for Sign-generalization test for Dehejia, Pop-Eleches and Samii (2021) in Figure A5.",
        booktabs = T,
        col.names = c("Threshold", "Decade", "Year of Census", "Income Group", "Country", "P-value"),
        digits = 3,
        format = "latex",
        longtable = TRUE) %>%
  kable_styling(latex_options = c("repeat_header", "scale_down")) %>%
  pack_rows("Outcome: Have More Kids", 1, 134) %>%
  pack_rows("Outcome: Economically Active", 135, 254),
     file = "./generated/Tables/Appendix_A5_numerics.tex")
  
```

Generate Figure A6.
```{r, fig.width = 5.25, fig.height = 2.5}
res_plot <- ggplot(res_obs %>% filter(Study == "Study Type: IV"), aes(x = threshold, y = p_value, color = Decade, shape = `Income group`)) + geom_point() + 
  ylim(c(0, 1)) + geom_hline(yintercept = 0.05, linetype = 2) + 
  theme_bw() + facet_grid(.~outcome, scales = "free_x") +
  xlab("Threshold") + ylab("Partial Conjunction p-value") +
  theme(axis.title.y = element_text(size = 10, margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(size = 10, margin = margin(t = 10, r = 0, b = 0, l = 0)),
        legend.title=element_text(size=9)) +
  # scale_x_continuous(breaks = c(2, 4, 6, 8, 10, 12)) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        legend.position = "right",
        legend.box = "horizontal")

res_plot
ggsave("./generated/Figures/Figure_A6_BisbeeEtAl_pct_iv.pdf", width = 5.25, height = 2.5)
```

```{r numerics_figure_A6, include = FALSE, eval = TRUE}
row.names(res_obs) <- NULL
cat( "\n\n\n", 
res_obs %>% filter(Study == "Study Type: IV") %>%
       select(threshold, Decade, `Year of Census`, `Income group`, Country, p_value) %>%
     kable(caption = "Numeric Values for Sign-generalization test for Bisbee et al. (2017) in Figure A6.",
        booktabs = T,
        col.names = c("Threshold", "Decade", "Year of Census", "Income Group", "Country", "P-value"),
        digits = 3,
        format = "latex",
        longtable = TRUE) %>%
  kable_styling(latex_options = c("repeat_header", "scale_down")) %>%
  pack_rows("Outcome: Economically Active", 1, 112),
     file = "./generated/Tables/Appendix_A6_numerics.tex")
```

Summarize data.
```{r}
data %>% group_by(Country) %>%
  summarize(n = n())

res_obs %>% group_by(Study, outcome) %>%
  summarize(mean_sign_support = mean(p_value < 0.05),
            count_sign_support = sum(p_value < 0.05),
            n = n())

data %>% group_by(Study, outcome) %>%
  summarize(sum(!is.na(Estimate)))
```

Total runtime is `r round(difftime(Sys.time(), start_time, units = "mins"), 2)` minutes.
